/* 
   This program imports the CART imputation .csv files for the FHS industries (except concrete, which has its
   own program, and sugar, for which we are not creating imputations) into SAS,
   merges the PPN and id (firm id), and saves the merged files.

   Note that for flooring, I dropped the year 1997 observations to get the CART impute script to work, so 
   the year 1997 observations will be missing when I merge the imputes with the IDs. 

*/

/*   July 23, 2014: Modified to include entry dummy (lbirth) from new imputations. */
/*   April 9-13, 2015: Modified to import different set of variables (no lbirth) from new imputations. */

%include "ASMimplibs.sas";


%MACRO import_imputes(ind);


            data WORK.&ind._imputes                           ;
            %let _EFIERR_ = 0; /* set the ERROR detection macro variable */
            infile "&ind._imputes.csv" delimiter = ',' MISSOVER DSD lrecl=32767 firstobs=2 ;
               informat number $5. ;
               informat YEAR best32.;
               informat TVS best32. ;
               informat TE best32.;
               informat PH best32. ;
               informat SW best32. ;
               informat PQS best32. ;
               informat kseq best32. ;
               informat ksst best32. ;
               informat fdeath $1. ;
               informat plant_cm best32.;
               informat energycmratio best32.;
               informat cpcmratio best32. ;
               informat wwswratio best32. ;
               informat pvtvsratio best32.;
               informat impsetnum best32. ;

               format number $5. ;
               format YEAR best12. ;
               format TVS best12. ;
               format TE best12. ;
               format PH best12. ;
               format SW best12. ;
               format PQS best12. ;
               format kseq best12. ;
               format ksst best12. ;
               format fdeath $1.;
               format plant_cm best12. ;
               format energycmratio best12. ;
               format cpcmratio best12. ;
               format wwswratio best12. ;
               format pvtvsratio best12. ;
               format impsetnum best12. ;
            input
                        number $
                        YEAR
                        TVS
                        TE
                        PH
                        SW
                        PQS
                        kseq
                        ksst
                        fdeath $
                        plant_cm
                        energycmratio
                        cpcmratio
                        wwswratio
                        pvtvsratio
                        impsetnum
            ;
            if _ERROR_ then call symputx('_EFIERR_',1);  /* set ERROR detection macro variable */
            run;
%MEND ;



%import_imputes(bred);
**%import_imputes(box);
%import_imputes(carbon);
%import_imputes(coff);
%import_imputes(flr);
%import_imputes(gas);
%import_imputes(iceb);
%import_imputes(icep);
%import_imputes(ply);



%MACRO merge_ids(ind);

  /* Replace entry flag with missing values in 1977, as it is in the FHS data. */
/** WE NO LONGER NEED THIS--NOT USING THE ENTRY DUMMY.  **/

   data &ind._imputes (keep = NUMBER_NUM YEAR TVS TE PH SW PQS kseq ksst fdeath plant_cm energycmratio cpcmratio wwswratio pvtvsratio impsetnum ); 
   set &ind._imputes; 
   NUMBER_NUM = input(NUMBER,5.);
/*  year_num = input(year,4.);  
   if year=1977 then lbirth = .; */
  run;



  PROC DATASETS  LIBRARY=WORK;
   MODIFY &ind._imputes;
   rename NUMBER_NUM = number;
/*   rename year_NUM = year; */
  RUN;

  PROC SORT DATA=&ind._imputes;
   by NUMBER;
  RUN;


  /** Merge ids **/

 data &ind._ids (keep = number year ppn id pqs_imp_mean ); 
  set fhs7797.&ind.f_gooddata_ids ;
 run;
 
 proc sort data=&ind._ids; by number; run;

  data &ind._imputes_after_merge;
   merge &ind._ids (in=inids) &ind._imputes (in=inimp); 
  by NUMBER;
  if inids and inimp;
  run;


  proc datasets library=work;
   modify &ind._imputes_after_merge ;
    rename impsetnum=_IMPUTE_;
  run;

  /* Check to make sure we got all the PPN's matched up correctly: */
  /* Note: rejCB&ind should be empty EXCEPT FOR YEAR 1997 FLOORING. */

  data CB&ind (keep = ppn year ) ; 
   set db50im.all&ind.f;
  run;

  proc sort data=CB&ind; by ppn year; run;
  proc sort data=&ind._imputes_after_merge; by ppn; run;

  data test&ind rejCB&ind;
    merge &ind._imputes_after_merge (in=inCART) CB&ind (in=inCB);
    by ppn year;
    if inCART and inCB then output test&ind;
    else if inCB then output rejCB&ind;
  run;

  proc sort data=rejCB&ind; by year; run;

  proc freq data=rejCB&ind;
    tables ppn;
  by year;
  title1 "PPNs in original gooddata for &ind";
  title2 "that did not match after merging PPNs to CART imputes";
  run;

%MEND;



**%merge_ids(boxes);
%merge_ids(bred);
%merge_ids(carbon);
%merge_ids(coff);
%merge_ids(flr);
%merge_ids(gas);
%merge_ids(iceb);
%merge_ids(icep);
%merge_ids(ply);


/* data fhs7797.box_imputes;    set box_imputes_after_merge; run; */
data fhs7797.bred_imputes;   set bred_imputes_after_merge; run;
data fhs7797.carbon_imputes; set carbon_imputes_after_merge; run;
data fhs7797.coff_imputes;   set coff_imputes_after_merge; run;
data fhs7797.flr_imputes;    set flr_imputes_after_merge; run; 
data fhs7797.gas_imputes;    set gas_imputes_after_merge; run;
data fhs7797.iceb_imputes;   set iceb_imputes_after_merge; run;
data fhs7797.icep_imputes;   set icep_imputes_after_merge; run;
data fhs7797.ply_imputes;    set ply_imputes_after_merge; run; 
